{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 寻找特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "data_folder = os.path.join(\".\", \"data\")\n",
    "adult_filename = os.path.join(data_folder, \"adult.data\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "adult = pd.read_csv(adult_filename, header=None, names=[\"Age\", \"Work-Class\", \"fnlwgt\", \"Education\",\n",
    "                                                        \"Education-Num\", \"Marital-Status\", \"Occupation\",\n",
    "                                                        \"Relationship\", \"Race\", \"Sex\", \"Capital-gain\",\n",
    "                                                        \"Capital-loss\", \"Hours-per-week\", \"Native-Country\",\n",
    "                                                        \"Earnings-Raw\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['Age', 'Work-Class', 'fnlwgt', 'Education', 'Education-Num',\n",
       "       'Marital-Status', 'Occupation', 'Relationship', 'Race', 'Sex',\n",
       "       'Capital-gain', 'Capital-loss', 'Hours-per-week', 'Native-Country',\n",
       "       'Earnings-Raw'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adult.dropna(how='all', inplace=True)\n",
    "adult.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    32561.000000\n",
       "mean        40.437456\n",
       "std         12.347429\n",
       "min          1.000000\n",
       "25%         40.000000\n",
       "50%         40.000000\n",
       "75%         45.000000\n",
       "max         99.000000\n",
       "Name: Hours-per-week, dtype: float64"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adult[\"Hours-per-week\"].describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10.0"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adult[\"Education-Num\"].median()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([' State-gov', ' Self-emp-not-inc', ' Private', ' Federal-gov',\n",
       "       ' Local-gov', ' ?', ' Self-emp-inc', ' Without-pay', ' Never-worked'], dtype=object)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "adult[\"Work-Class\"].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2],\n",
       "       [ 3,  4,  5],\n",
       "       [ 6,  7,  8],\n",
       "       [ 9, 10, 11],\n",
       "       [12, 13, 14],\n",
       "       [15, 16, 17],\n",
       "       [18, 19, 20],\n",
       "       [21, 22, 23],\n",
       "       [24, 25, 26],\n",
       "       [27, 28, 29]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "X = np.arange(30).reshape((10, 3))\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0,  1,  2],\n",
       "       [ 3,  1,  5],\n",
       "       [ 6,  1,  8],\n",
       "       [ 9,  1, 11],\n",
       "       [12,  1, 14],\n",
       "       [15,  1, 17],\n",
       "       [18,  1, 20],\n",
       "       [21,  1, 23],\n",
       "       [24,  1, 26],\n",
       "       [27,  1, 29]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X[:,1] = 1\n",
    "X"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 0  2]\n",
      " [ 3  5]\n",
      " [ 6  8]\n",
      " [ 9 11]\n",
      " [12 14]\n",
      " [15 17]\n",
      " [18 20]\n",
      " [21 23]\n",
      " [24 26]\n",
      " [27 29]]\n",
      "[ 74.25   0.    74.25]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.feature_selection import VarianceThreshold\n",
    "\n",
    "vt = VarianceThreshold()\n",
    "Xt = vt.fit_transform(X)\n",
    "print(Xt)\n",
    "print(vt.variances_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = adult[[\"Age\", \"Education-Num\", \"Capital-gain\", \"Capital-loss\", \"Hours-per-week\"]].values\n",
    "y = (adult[\"Earnings-Raw\"] == ' >50K').values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[   39    13  2174     0    40]\n",
      " [   50    13     0     0    13]\n",
      " [   38     9     0     0    40]\n",
      " ..., \n",
      " [   58     9     0     0    40]\n",
      " [   22     9     0     0    20]\n",
      " [   52     9 15024     0    40]]\n",
      "[  8.60061182e+03   2.40142178e+03   8.21924671e+07   1.37214589e+06\n",
      "   6.47640900e+03]\n"
     ]
    }
   ],
   "source": [
    "from sklearn.feature_selection import SelectKBest\n",
    "from sklearn.feature_selection import chi2\n",
    "print(X)\n",
    "transformer = SelectKBest(score_func=chi2, k=3)\n",
    "Xt_chi2 = transformer.fit_transform(X, y)\n",
    "print(transformer.scores_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.stats import pearsonr\n",
    "\n",
    "def multivariate_pearsonr(X, y):\n",
    "    scores, pvalues = [], []\n",
    "    for column in range(X.shape[1]):\n",
    "        cur_score, cur_p = pearsonr(X[:,column], y)\n",
    "        scores.append(abs(cur_score))\n",
    "        pvalues.append(cur_p)\n",
    "    return (np.array(scores), np.array(pvalues))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 0.2340371   0.33515395  0.22332882  0.15052631  0.22968907]\n"
     ]
    }
   ],
   "source": [
    "transformer = SelectKBest(score_func=multivariate_pearsonr, k=3)\n",
    "Xt_pearson = transformer.fit_transform(X, y)\n",
    "print(transformer.scores_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/dlinking-lxy/more-space/pyworks/venv/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
      "  \"This module will be removed in 0.20.\", DeprecationWarning)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.cross_validation import cross_val_score\n",
    "clf = DecisionTreeClassifier(random_state=14)\n",
    "scores_chi2 = cross_val_score(clf, Xt_chi2, y, scoring='accuracy')\n",
    "scores_pearson = cross_val_score(clf, Xt_pearson, y, scoring='accuracy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Chi2 performance: 0.829\n",
      "Pearson performance: 0.771\n"
     ]
    }
   ],
   "source": [
    "print(\"Chi2 performance: {0:.3f}\".format(scores_chi2.mean()))\n",
    "print(\"Pearson performance: {0:.3f}\".format(scores_pearson.mean()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 创造特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/dlinking-lxy/more-space/pyworks/venv/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2728: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  interactivity=interactivity, compiler=compiler, result=result)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    0      1       2     3     4     5     6     7     8     9     ...   1549  \\\n",
      "0  125.0  125.0  1.0000   1.0   0.0   0.0   0.0   0.0   0.0   0.0  ...    0.0   \n",
      "1   57.0  468.0  8.2105   1.0   0.0   0.0   0.0   0.0   0.0   0.0  ...    0.0   \n",
      "2   33.0  230.0  6.9696   1.0   0.0   0.0   0.0   0.0   0.0   0.0  ...    0.0   \n",
      "3   60.0  468.0  7.8000   1.0   0.0   0.0   0.0   0.0   0.0   0.0  ...    0.0   \n",
      "4   60.0  468.0  7.8000   1.0   0.0   0.0   0.0   0.0   0.0   0.0  ...    0.0   \n",
      "\n",
      "   1550  1551  1552  1553  1554  1555  1556  1557  1558  \n",
      "0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   1.0  \n",
      "1   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   1.0  \n",
      "2   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   1.0  \n",
      "3   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   1.0  \n",
      "4   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   1.0  \n",
      "\n",
      "[5 rows x 1559 columns]\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "data_folder = os.path.join(\".\", \"data\")\n",
    "data_filename = os.path.join(data_folder, \"ad.data\")\n",
    "\n",
    "def convert_number(x):\n",
    "    try:\n",
    "        return float(x)\n",
    "    except ValueError:\n",
    "        return np.nan\n",
    "\n",
    "from collections import defaultdict\n",
    "converters = defaultdict(convert_number)  #{i: convert_number for i in range(1558)}\n",
    "converters[1558] = lambda x: 1 if x.strip() == \"ad.\" else 0\n",
    "    \n",
    "ads = pd.read_csv(data_filename, header=None, converters=converters)\n",
    "# ads = ads.applymap(lambda x: np.nan if isinstance(x, str) and not x == \"ad.\" else x)\n",
    "# ads[[0, 1, 2]] = ads[[0, 1, 2]].astype(float)\n",
    "\n",
    "ads = ads.apply(pd.to_numeric, errors='coerce')\n",
    "ads = ads.astype(float).dropna()\n",
    "print(ads[:5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2359, 1558) (2359,)\n",
      "[[ 125.      125.        1.     ...,    0.        0.        0.    ]\n",
      " [  57.      468.        8.2105 ...,    0.        0.        0.    ]\n",
      " [  33.      230.        6.9696 ...,    0.        0.        0.    ]\n",
      " ..., \n",
      " [ 101.      140.        1.3861 ...,    0.        0.        0.    ]\n",
      " [  23.      120.        5.2173 ...,    0.        0.        0.    ]\n",
      " [  40.       40.        1.     ...,    0.        0.        0.    ]]\n"
     ]
    }
   ],
   "source": [
    "# ads = ads.astype(float).dropna()\n",
    "X = ads.drop(1558, axis=1).values\n",
    "y = ads[1558]\n",
    "print(X.shape,y.shape)\n",
    "print(X)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2359, 5)\n"
     ]
    }
   ],
   "source": [
    "from sklearn.decomposition import PCA\n",
    "pca = PCA(n_components=5)\n",
    "Xd = pca.fit_transform(X)\n",
    "print(Xd.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.854,  0.145,  0.001,  0.   ,  0.   ])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "np.set_printoptions(precision=3, suppress=True)\n",
    "pca.explained_variance_ratio_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The average score is 0.9334\n"
     ]
    }
   ],
   "source": [
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.cross_validation import cross_val_score\n",
    "\n",
    "clf = DecisionTreeClassifier(random_state=14)\n",
    "scores = cross_val_score(clf, X, y, scoring='accuracy')\n",
    "print(\"The average score is {:.4f}\".format(np.mean(scores)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.092,  0.995,  0.024, ...,  0.   ,  0.   ,  0.   ])"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pca.components_[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The average score from the reduced dataset is 0.9326\n"
     ]
    }
   ],
   "source": [
    "clf = DecisionTreeClassifier(random_state=14)\n",
    "scores_reduced = cross_val_score(clf, Xd, y, scoring='accuracy')\n",
    "print(\"The average score from the reduced dataset is {:.4f}\".format(np.mean(scores_reduced)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD8CAYAAAB6paOMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnX+UXHV58D/Pzu4mGaIJ2UTemGVnsVBLYvxFRGl9Lbqx\nQiygHuXgGWgQz9my0fcNb1sVuq0B326L9bwFfDXRrYCBnYq+VgtoisWI5z2nPaKhYiIgL6lml00R\nwkIisEA2u8/7x72zc2fm3pk7M3dm7sw8n3PumXu/986dZ2bvfp/v9/n1FVXFMAzD6Fy6mi2AYRiG\n0VxMERiGYXQ4pggMwzA6HFMEhmEYHY4pAsMwjA7HFIFhGEaHY4rAMAyjwzFFYBiG0eGYIjAMw+hw\nupstQBhWr16tg4ODzRbDMAyjpXjggQeeVtU15a5rCUUwODjIvn37mi2GYRhGSyEik2GuM9OQYRhG\nhxOJIhCRlSLyTRH5hYg8IiLniMgqEblXRB5zX092rxUR+byIHBSR/SLy5ihkMAzDMKojqhnBTcA9\nqvo7wBuAR4Crgb2qegaw1z0GOB84w92GgV0RyWAYhmFUQc0+AhFZAbwDuBxAVY8Dx0XkIuBc97Ld\nwA+BTwEXAbepU//6R+5sYq2qPlGrLIZhGFEzNzfH9PQ0L730UrNFCWTp0qX09/fT09NT1fujcBaf\nBhwBbhWRNwAPANuBUzyd+6+BU9z9dcDjnvdPu22mCAzDiB3T09O84hWvYHBwEBFptjhFqCozMzNM\nT09z2mmnVXWPKExD3cCbgV2q+ibgBXJmIADc0X9FK+CIyLCI7BORfUeOHIlAzAaQycDgIHR1Oa+Z\nTLMlMgyjRl566SX6+vpiqQQARIS+vr6aZixRKIJpYFpV73ePv4mjGJ4UkbUA7utT7vnDwKme9/e7\nbXmo6riqblLVTWvWlA2DbT6ZDAwPw+QkqDqvw8OmDAyjDYirEshSq3w1KwJV/TXwuIi81m0aAh4G\n7gK2um1bgTvd/buAP3Kjh94GHGsL/8DoKMzO5rfNzjrthmEYMSaqqKH/BmREZD/wRuCvgeuBd4vI\nY8Bm9xhgD/BL4CDw98C2iGRoLlNTlbUbhmFUwD333MNrX/taTj/9dK6//vryb6iASDKLVfVBYJPP\nqSGfaxX4WBSfGysGBhxzkF+7YRhGDczPz/Oxj32Me++9l/7+ft7ylrdw4YUXsn79+kjub5nFUTE2\nBslkflsy6bQbhtE51CFo5Mc//jGnn346r3nNa+jt7eWSSy7hzjvvLP/GkJgiiIp0GsbHIZUCEed1\nfNxpNwyjM6hT0Mjhw4c59dRcjE1/fz+HDxfF2FSNKYIoSafh0CFYWHBeTQkYRmfRokEjpggMwzCi\nok5BI+vWrePxx3N5uNPT06xbt66me3oxRWAYhhEVQcEhNQaNvOUtb+Gxxx7jV7/6FcePH+eOO+7g\nwgsvrOmeXkwRGIZhREWdgka6u7v5whe+wHve8x7OPPNMLr74YjZs2FDTPfPuH9mdDMMwOp2sX3B0\n1DEHDQw4SiACf+GWLVvYsmVLzffxwxSBYRhGlKTTLRcoYqYhwzCMDscUgWEYRodjisAwDKPDMUVg\nGIbR4ZgiMAzD6HBMERiGYcScK664gle96lW87nWvq8v9TREYhmHEnMsvv5x77rmnbvc3RWAYhhEh\nmQMZBm8cpOu6LgZvHCRzoPYy1O94xztYtWpVBNL5YwllhmEYEZE5kGH47mFm55wKpJPHJhm+exiA\n9Mb4JpnZjMAwDCMiRveOLiqBLLNzs4zutTLUhmEYHcHUMf9y00HtccEUgWEYRkQMrPAvNx3UHhci\nUQQickhEDojIgyKyz21bJSL3ishj7uvJbruIyOdF5KCI7BeRN0chg2EYRrMZGxoj2ZNfhjrZk2Rs\nqLYy1B/+8Ic555xzePTRR+nv7+fmm2+u6X6FROksfqeqPu05vhrYq6rXi8jV7vGngPOBM9ztrcAu\n99UwDKOlyTqER/eOMnVsioEVA4wNjdXsKP7a174WhXiB1DNq6CLgXHd/N/BDHEVwEXCbqirwIxFZ\nKSJrVfWJOspiGIbRENIb07GOEPIjKh+BAv8iIg+IyLDbdoqnc/81cIq7vw543PPeabctDxEZFpF9\nIrLvyJEjEYlpGIZhFBLVjODtqnpYRF4F3Csiv/CeVFUVEa3khqo6DowDbNq0qaL3GoZhRImqIiLN\nFiMQx8BSPZHMCFT1sPv6FPBt4GzgSRFZC+C+PuVefhg41fP2frfNMAwjdixdupSZmZmaO9t6oarM\nzMywdOnSqu9R84xARE4CulT1OXf/D4DPAHcBW4Hr3dc73bfcBXxcRO7AcRIfM/+AYRhxpb+/n+np\naeJsol66dCn9/f1Vvz8K09ApwLfdaVM38A+qeo+I/AT4hoh8FJgELnav3wNsAQ4Cs8BHIpDBMAyj\nLvT09HDaaac1W4y6UrMiUNVfAm/waZ8BhnzaFfhYrZ9rGIZhRINlFhuGYXQ4pggMwzA6HFMEhmEY\nHY4pAsMwjA7HFIFhGEaHY4rAMAyjwzFFYBiG0eGYIjAMw+hwTBEYhmF0OKYIDMMwOhxTBIZhGB2O\nKQLDMIwOxxSBYRhGh2OKwDAMo8MxRWAYhtHhmCIwDMPocEwRGEVs++42uj/TjVwndH+mm23f3dZs\nkQzDqCNRLFVptBHbvruNXft2LR7P6/zi8c737myWWIZh1BGbERh5jD8wXlG7YRitjykCI495na+o\n3TCM1icyRSAiCRH5qYh8xz0+TUTuF5GDIvJ1Eel125e4xwfd84NRyWDUTkISFbUbhtH6RDkj2A48\n4jn+LHCDqp4OPAt81G3/KPCs236De50RE4bPGq6o3TCM1icSRSAi/cB7ga+4xwK8C/ime8lu4H3u\n/kXuMe75Ifd6IwbsfO9ORjaNLM4AEpJgZNOIOYoNo42JKmroRuCTwCvc4z7gqKqecI+ngXXu/jrg\ncQBVPSEix9zrn/beUESGgWGAgYGBiMQ0wrDzvTut4zeMDqLmGYGI/CHwlKo+EIE8i6jquKpuUtVN\na9asifLWhmEYhocoZgS/B1woIluApcArgZuAlSLS7c4K+oHD7vWHgVOBaRHpBlYAMxHIYRiGYVRB\nzTMCVb1GVftVdRC4BPiBqqaB+4APupdtBe509+9yj3HP/0BVtVY5DMMwjOqoZx7Bp4A/EZGDOD6A\nm932m4E+t/1PgKvrKINhGIZRhkhLTKjqD4Efuvu/BM72ueYl4ENRfq5hGIZRPZZZbBiGEUcyGRgc\nhK4u5zWTqdtHWdE5wzCMuJHJwPAwzM46x5OTzjFAOh35x9mMwDAMI26MjuaUQJbZWae9DpgiMAzD\niBtTU5W114gpAsMwjLgRVE2hTlUWTBEYhmHEjbExSCbz25JJp70OmCIwDMOIG+k0jI9DKgUizuv4\neF0cxWBRQ4ZhGPEkna5bx1+IzQgMwzA6HFMEhmEYHY4pAsMwjA7HFIFhGEaHY4rAMAyjwzFFYBiG\n0eGYIjAMw+hwTBEYhmF0OKYIDMMwOhxTBIZhGB2OKQLDMIwOxxSBYRhGh1OzIhCRpSLyYxH5mYg8\nJCLXue2nicj9InJQRL4uIr1u+xL3+KB7frBWGQzDMIzqiWJG8DLwLlV9A/BG4DwReRvwWeAGVT0d\neBb4qHv9R4Fn3fYb3OsMwzCMJlGzIlCH593DHndT4F3AN9323cD73P2L3GPc80MiIrXKYRiGYVRH\nJD4CEUmIyIPAU8C9wH8AR1X1hHvJNLDO3V8HPA7gnj8G9Pncc1hE9onIviNHjkQhpmEYhuFDJIpA\nVedV9Y1AP3A28DsR3HNcVTep6qY1a9bULKPRAWQyMDgIXV3OaybTbIkMoyWIdIUyVT0qIvcB5wAr\nRaTbHfX3A4fdyw4DpwLTItINrABmopTD6EAyGRgehtlZ53hy0jmGhq3yZBitShRRQ2tEZKW7vwx4\nN/AIcB/wQfeyrcCd7v5d7jHu+R+oqtYqh9FGVDOyHx3NKYEss7NOeytgsxmjiUQxI1gL7BaRBI5i\n+YaqfkdEHgbuEJG/An4K3OxefzNwu4gcBJ4BLolABqNdqHZkPzVVWXucsNmM0WSkFQbjmzZt0n37\n9jVbDKMRDA46HWEhqRQcOhT9++JAK8tuxBoReUBVN5W7zjKLjXhR7ch+bAySyfy2ZNJpjzutPJsx\n2gJTBJ1EK9ihBwYqa8+STsP4uDOKFnFex8dbw7RS7Xc2jIgwRdApZO3Qk5OgmrNDx00Z1DKyT6cd\nU8rCgvPaCkoAWns2Y7QFpgg6hbhH1WRnK5ddBsuWQV9f643sq6WVZzNGW2CKoFOIsx26cLYyMwMv\nvgi3395aI/taaNXZTBS0gsmyzTFF0CnE2Q4dNFv54z9ujjxG42gVk2WbY4qgU4izHTpoVvLCC7Bt\nW2NlMRpL3E2WHYIpgk4ha4fu89T3W7asefJ4KTUrGR9vnBxG44mzybKDMEXQabz4Ym5/ZiYe0/BS\ns5L5+cbJYTSeOJssOwhTBJ1EXKfh6bTjKPQjkWisLO1AKzlf42yy7CBMEXQScZ6GBzmGszV3jHC0\nmvPVQmdjgSmCTqIR0/BqR6M7d8LISG4GkEg4xzt3RidbJxDXWV8pOjl0NiaYIugkKp2GV9qp1zoa\n3bkTTpxw3nvihCkBqPxvEOdZnxFfVDX221lnnaVGRExMqKZSqiLO68RE8HXJpKrTLTtbMln6+kQi\n//rslkrV6cu0OZX+DVSd39r+BoYLsE9D9LFWhtrwp5LSyIX19AsRcab9RmUE/Q0SCdi929+E4ve3\nSCbN7t6hWBlqozYqMTH42aW9WChgdQT9Debng01u5nw1qsAUgeFPJY7lUvZnCwWsnlIKtJQD2Jyv\nRoWYIjD82bLFGVF6CerUgzqsRMJGo7Xg59z3Yg5gIyJMERjFZDKODdrrPxKBrVv9O/WgaKQgO7YR\njqyZJyiprhkmt1ZKVjNCY4rAKMbP5q8Ke/b4X2926fqRTjsKNQ7Zt62WrGaEpuaoIRE5FbgNOAVQ\nYFxVbxKRVcDXgUHgEHCxqj4rIgLcBGwBZoHLVfXfS32GRQ01mK6u/NlAFov+aR6ZjKOgp6acmcDY\nWOMVbSWRZEYsaGTU0AngT1V1PfA24GMish64GtirqmcAe91jgPOBM9xtGNgVgQxGlFghsPgRBwew\nJau1LTUrAlV9IjuiV9XngEeAdcBFwG73st3A+9z9i4Db3HyHHwErRWRtrXIYEWKFwAw/bIDQtkTq\nIxCRQeBNwP3AKar6hHvq1zimI3CUxOOet027bYX3GhaRfSKy78iRI1GKaZQjKpt/rY5Fc0zGCxsg\ntC9h0o/DbMBy4AHgA+7x0YLzz7qv3wHe7mnfC2wqdW8rMdGCVFMeIcr3G/UhbIkSIxYQssREJDMC\nEekB/hHIqOq33OYnsyYf9/Upt/0wcKrn7f1um9FOVFIF02/kX0sVTZtJ1I84+CqMyKlZEbhRQDcD\nj6jq33lO3QVsdfe3And62v9IHN4GHNOcCcmIkMyBDIM3DtJ1XReDNw6SOVDHDrGw8/WLLoFix2JQ\nSGLY9/vJYSGOhlERUcwIfg+4DHiXiDzobluA64F3i8hjwGb3GGAP8EvgIPD3gK1OXgcyBzIM3z3M\n5LFJFGXy2CTDdw+XVgbVjqT9Ot/CrOQsXV3hRv7VJlG1Yj1+w2gyVn20TRm8cZDJY8Wj6tSKFIeu\nOlT8hlqqVgbNAET88xG89y9VrK7wfBh5LAfCMBax6qPtTpnR+9QxfxNKUHtNI+kgc41qLvLIb4Rf\nauSfjVSqNHLJQhwNo2JMEbQiIezgAyv8O76g9pqShYI62WzG6cJC8Gh8fh56evLbenpymbOVOiYt\nxNEwKsYUQSsSYvQ+NjRGsie/Q0z2JBkbCugQaxlJh+l8V63yf29fX7E/YW4Otm+vzsFrdY8Mo2JM\nEbQiIUbv6Y1pxi8YJ7UihSCkVqQYv2Cc9MaADrHWkfSyZbn9vr78zjeTgeeeK35PdiZw/HjxuZmZ\n6qN9LMTRMCqiu9kCGFUwMODvnC0Yvac3poM7/kKynWWlhc38nMwvvph/zeiof2c/P+90+EHMzjql\nr73yGYYROTYjaEXqZQcvNZIOck6HcTIHzWAWFoLDTLOUWpbRMIxIMEXQitTTDu7X4ZdyTodxMpfy\nM6iWVwZR5gFY1rFhFBOmDkWzN6s11CCC6vv09eW3ZbdUytmCzpW6b+G2fHnp8yL1+35+9XKspo7R\nBtDIWkNGmxBk5gmy409N+ZupRJxZQ3bEnU7DOecEf25XFyxZUlq2gYHaR/NhcyWsTIXRYVhmsZEj\nKCs3iGyeQLZURLa0hPceyaTTaT/8cPVyJZOO03j37uKOvK8PbropnFksbNaxrcRltAmWWWxUTqlY\n/66CR6Wry5kNeJdQTCSKO9rZ2dqUQCLh+D/27PEvR1FJmGnYXAlbicvoMEwRdAJhTCqZDPzmN8Xt\nvb3wxjcWZwYvLMCtt+abUObno5U7mXRmAel06U44rDM5bLSVlakwOo0wjoRmb+YsroFyDtKsUzTI\nSdvXp5pIlHbkRrX19QU7aEvJmN3COHTDOIFtURyjTSCks7jpnXyYzRRBFZTr4LOdYLloHpHGKIFy\nHe3EhGpvb233qOb3s6gho4UJqwjMWdyO+GX7+pFIlDfn9PXB0aPRm32yiITPYl69unQmMphD1zA8\nmLO4kyj0AWzfXl4JiITr3GdmoLtOlUhSqcrqAT3zTPlrKnXoWoKZYZgiaDkKO65t24pj3suNmsst\nGFPIyy87yqBcBnAhfX2lz1daEiOMs7YSh67lCxgGYIogOrZty3WW3d3OcT3ufeml+R3Xrl3lR/9e\n+voqUwJZTpxwOllVmJgoXkOgkGQSLr64OEony8RE8Syg3OjcL+rHS3Ydg7DYspaG4RDGkdDsLfbO\n4pERf+flyEhNt53YP6Gpv1yusgNNXYVObKzRGTsyUt7hWm5bFG6iuPRE1rGcda6GKT/hvV+5SB2/\nzyyMOqqEMN/TMFoYGhk1BNwCPAX83NO2CrgXeMx9PdltF+DzOIvX7wfeXO7+sVcEQeGViUTVt5zY\nP6HJsaRyLYtb8s8rUAZ+oZhhQjBLbV1dlUXSBEUc+dUNKqc0wkY4VUId/m6GB4u8ajphFUFUpqGv\nAucVtF0N7FXVM4C97jHA+cAZ7jYM7IpIhuYR5HStIdJmdO8os3P5ZovZXhgdCnmDrJ/g9tsdc0m2\nBEQtLCxUZk8PsterOmau1atz73edvJmNMHgVdO1wXjOvdGX2M+OE/bwg6vB3M1zM/9JSRKIIVPX/\nAoUhHRcBu9393cD7PO23uQrrR8BKEVkbhRxNI2gB9qD2EAQuPr+igptMTjo+haxfIWrK2dPL2fRn\nZuCKK5zOYWCAzEYYvgAmV4KK8zp8kZA5UKLcdZZq1mNIpSprN8Jj/peWop7O4lNU9Ql3/9fAKe7+\nOuBxz3XTblvrMjxcWbuXAAdp4OLzx6oTsW5kC82JwMknF5/3LmHpx/HjTucwNsboZmG2N//0bLcy\nune09Gi/2vUYbKH7+mH1mlqKhkQNubaqikJVRGRYRPaJyL4jR47USbKI2LkTRkZyM4CuLjjpJPjS\nl0rHppeYPvsuPn8cxn4gzmcVFoGLmpNOqvwzjh51OtLVq3MRTuVCWcH53pddxtQr/R+RqWMlyl2P\njFS/LnE7LnQfl7wIq9fUWoRxJITZgEHyncWPAmvd/bXAo+7+l4EP+10XtMXeWeylkjo1ZRykE/sn\nNHVDSuVa0dQNKZ3Y77lHLU7fUlsikYt2alR5CXdLXUWeczy7pf4s4chy0knF77MaQDniVCMpTrJ0\nMDS6xISIDALfUdXXucefA2ZU9XoRuRpYpaqfFJH3Ah8HtgBvBT6vqmeXundLlZiopJZ92Pr4flSa\n3FUNYUpQREjWR+A1DyWPw/jdkD5Q4o1WVsIhbusoeEuUhy0jYkRK2BITkSgCEfkacC6wGngS2AH8\nE/ANYACYBC5W1WdERIAv4EQZzQIfUdWSvXxLKYJKOvda/nGrVQS1du7JpGM+ufTS6u9RgsxGJzJq\naoXjDxnbW0YJQDjF2QnUMrAw2pKwiiAy01A9t5YyDUWYRBVoGpqYqM5sU25d4HJbX19uat9Ak1HZ\nbcmShvxpY08lz57REWBrFjeJSiJRSjgrMwcyDN89zOSxSRRl8tgkw9+4jMzrBS67zH/k52HDlSA7\nctuGK4Hnn6/tu83MODOBehWhq5aXX4bNm5stRfOxKCijSkwRRE2lkSjptGMGKqjC6ZtQ1q1OQlkI\nJfDwKTg53O728CmuMoiCOCZc7d1ryUrtGAVlNARTBPUgoHOvhFoSyhaVgBdXGbQ1nZC5msnkwnML\ns7MhkmfP6DxMETSbCBPKsuagjqXdM1czGScT25ubMTMDH/lI+ytAo66YImgmQQll27YxdufzJI/n\nX5487kTR+FFkDmoHRkbKr2lQyNRUfJKqomZ01MnELmRurr0VYDNp12epAFuqspkEhY+6C8dUEkop\nOyitABTWPwkPfSkCuRtBXx88/bSzH3bpzez7Xnwx/9psyGurm0mCwkPBQkTrgd9z12LPki1V2QoE\nFYJz/9nTB+DQjbBwnfNaNp6+DC2jBJJJuOmm3LHXCVrufdC+xc5KlWcIOtchI9q60EGF80wRNItM\npjHZwXGlp8dZpUzVeS0X6ZJOO/WPgsi+L2hdY2+xs1btHMfGoLe3uD1oZTYrBV0bHVQ4zxRBlFTS\nwYyOlg0DrYT1T0JlZf2azNwc/Ou/OvveSJfs2gl+v+HDDwffLxshU67YWSt3juk03HJLvt+krw9u\nvdXfVNFBI9q60EmF88JknTV7a4nM4lJZwn4rNVWQGTxyPpr4S5QdzuvI+f7Xrb/SuYYdBYXbdgS/\np6mbSPFSlKUKlZW6V9h7dFL2bSUrxBnFtEHhPBq5VGW9t5ZQBEEdTF9f8TrB3d2hO8uR8yvv2MMq\njlhs3qUog5aOzF4TRhFk7xW0RGIndY6dpPTqRYsvtxlWEVjUUFR0dbHtPGV8E8x3QWIB5iHP+NYz\nB8f/urLbdv8lzPssdJaYhxP/sxaB48G282H8rV3M6wKJBRjeBzv/ueCibERMKZ+K33PsV/0yaMnO\ndqxg2gZRL0ZtWNRQg9n2oSS7znY7bXFfu8gr8zDXA71/XuIe50PXp3P1gV5xjaNU/AhqD0PRusAb\nq79XLWw7H+c3Y2HxN9t1ttOeR9YmW8nSkkG+gC1bOqcej5WcMEJiiqBaChzD42fO+pZ1KDye6/G/\nXbZTVI/yeH5J8McnqgwZ910X+ILmKIPxTfj+RuPe8Yu3k66kqNr27f6O0j17OqtztJITRghMEVSD\nz2hzvoqQnZM/kRv97zob/4QwgaJbq2NCqYbRIYrXBe512htN2dlOIpHfSYcd4WYywUtkTk0Vd47Q\nGuGkrRr2asQeUwRZKg39LBhtVjpCP/kTcDRJnumoFIl5QJ3XkR/72NFDElS0Lkwxu1opNElJgO5M\nLOCM9Hfv9s8n8HTimdfD4I2DdF3XxeCNg2QOZEqHRxaG/mUyTq0erwkpjrV7Wjns1Yg95iwG55/p\n8svhxIlcW3c3fPWr/lNpn1T/rGmnXJmHPELmk6WOOpnFUTB4lWMOqudn+JE5t4/hdz7HrOZq5XTP\nw4msKSyLwsgjJ7Hzwi8HmzFcJ3DmlZMMXyTMdud+2GRPkvE7ZoOzsEdGHPNQ1oH89NPwwgvF13lL\nXDSbTAa2bvUv/92OTm4jMsxZXAlXXpmvBMA5vjKggL9PQsnOf4blPvXAAEcBKKFH/16654MLzVXD\n2F7CF7NbssRRerXgZhCPXrQ8TwkAnEg4v1nRbOfCLwcnlXlGxqND5CkBgNm5WUbf4xNmBbB8uTPL\n8I6q/ZQABJuWGk32+watAdGGWa5G4zFFAMErdwW1+zktgRd8sv8XKedIzqK5bfnL8NV/qr3GUJZs\nEbvZnlznmzpasDh8KpWLOH/pJbjtNt/vGhp35hS0vsILvU4YrF7nvO78XldpE4jHLBdo5lo+7+9U\nXrIkXOG6OOGXHeyl2ixX8zcYHkwRVEM6Tea3ZotCMEutFeCLjxN45azTKep18NzfRKsEstFC2VDN\n5FxxRdPMp7bk29xfT37Bt0rrI504Adu3h19fYdmy0mURPCPgoN97YEXK36kcVIfIj0rLX9eLUiP+\nasNezd9gFNA0RSAi54nIoyJyUESubpYcrjDh2t1RVOb14huCueVRf7NLSTwzAHAcyLLDcSZXyrbz\nnQQ02eG8euPxw0QLZTbC8JGb89dJvnvYUQaHDjmdxu2353ewYZiZYWxojGRP/ig9udDN2A8KfuMg\nU022Q/SMgH3NXD1JxobG/MMmw46ee3vzq582kyCZCyOqKqHaGkRRzCI2bwYRMq8XBv+H0HWt5Jz8\nRtNoiiIQkQTwReB8YD3wYRFZ3wxZAFi71r/d6yMotE37dKp7XuuYWVJHnYiYrNnFtyCcOu16nTML\nAPJ8CFmFUC7hK/EX+SGo3oS2XWc7SWmZjeGihUaHKLLjz87NMnrb1tw/fWEHG1IZpDemGb9gnNSK\nFIKQWpFi/INfJf3J28OZnrIdoscslz7g/t7HBAHnnheMk94Y0DkG5SGMjOQrt1tuqayDraeZJUhm\nv4iqsFRTVTOKWcTmzbB3b3EuS3bAYcqgaTQlakhEzgGuVdX3uMfXAKjq3/hdX9eoIffhLOLVr4bD\nh3PHPT2LDuWuHc4DXIios3aAH4sriLl4F4kpt6hM8niBHd8l8RewkCj93uz7l83BjF8VZ82Fo5b8\nXp8LKE0QdtGYoOcsaHGevC9Q8Nl+pSPCdoq1vDfofvUu4xC1zEG/eakIpGreU4g7ww6MXFuR4tBV\nIe9lhCLuUUPrgMc9x9NuW+PxUwIA//mfuf0NG/KiigJt0yV8BA99KWf71+sqWyQmKOErjBLIvh8C\nzFSSK+tQ8nt5TQfeEfDoqBPaWM6mfvLJ/u1TU+VLXoyPOyWru7udzmTrVqdURDXZslFn2tZS6nnb\nttx36u5zCLICAAARhklEQVR2jhshcyUZ2lkirM0fODsNCCgw6k9sncUiMiwi+0Rk35EjR5orTEEd\n/IpCMCNickXOBCQ7nBlGJTyTdGYVvgnQblmHLY/6nFe3HZwR4fLlxQlYX/kKPPdcaQGOHvVtzvz+\nqiJ/y6UfcL/np3Fs4bfeCrt25UIo5+ed46COs174mYCq7SA3b27ed6qmBlGEtfmDBhxd0pWfGGg0\njGYpgsPAqZ7jfrdtEVUdV9VNqrppzZo1DRWuHIu26QJfQLURPitn8e+gC/H4ELxmpjAMHCst33yX\n4+PwC3Pd81rP8QsvOIvKeJmb819UPQSjm4v9Ld7vKaPzwbO28fGS984cyBRnHVdLkI181Sr/60t1\nkJlM8Hf68peLmjZ8cQNynSxuG764oYovUECls4xqZhGFDDnTWr+BFMC8zucHKZgyaBjNUgQ/Ac4Q\nkdNEpBe4BLirKZIMBRTZCWp3iXI94Wc/51EGngiiPILyEHxG8IVtebOVIIWjDSg/4eNYnDpRIqSz\nXPJdUJIVjhIYvns4PwLqjkvJvHO1I0elDt4gExCE6yC9n7d1a/DnLCzkybLhixt4+On8GenDTz8c\njTKohCgqmX7/+zA0VDSQSkhxAuDs3Cyje20ltUbRFEWgqieAjwPfAx4BvqGqDzVDluzDmcfQkNO+\nbl3D1hV+9nM5/8HEtwg3QwC63MSwouxlnOO+FwpmK0FfR6rzfVSEN8rE7RgHjtYYrBDQkY/uHWV2\nLr/jnu2F0TfOOKatK64IFwGT7cCDHNrPPFO+gyycTZRQYI7wuQ6wUAmUa68rUfgqvv99UCW9Xzl0\ng7JwrbKg/oW6zGfQOJrmI1DVPar626r6W6ra3GLw7sO5uGWVgNdh3EAqmV3M/5WjPFLH8J01LJ8L\nf7+qfR+JgJIOhczOwqWXOh3mpZfC5GSgmSA02Y780kvzFnYP6kSmVuBvypqddUpXe/F24EEMDOQ6\nyNtvd9ouuyxfOZXLDi4SMp4dYKSmNg+ByYYB7Ub0xNZZ3HB6e50OKrs1UAkULkjT9ekQb3LzEMCJ\nspmMwKxTte9jpU8sYEg+fp5T8sLXJBZkJgtibm7RTBM6k9lD5tUzDI6tznV0X/FZ08CL1wRUKs6+\n0o49pAM2bGccRQfua2qLyI7vm2yYTQw0GkLnVh9twog/W+tnaoXTIY3thX/tD1G11Iv758rmIWST\nc4ocri6po87nZD+3az6XdOa9pzevoVHkleL2yOLd189UcWPVxY7Lax4KyscA/9+x1PWkUvnx/KXi\n7MH/XFeXY2bxUpCD4OcjyLu8J7mYRJc5kGF07yhTx6YYWDGw2JEW/Q49Sba+YSt7HtuTd603Ea/w\nXpNHJ32f0ahi//1kD0wMNEITNo+gMxVBnZSAX0ef7USCOpqXumEh5LxMFuD2b+d3TEHJOdn7b/0p\n7H5TgaIo+JMn5p2chEKZo8Tvt7n0A/grQHXMXdXcM32AxeS1zIEMo3dtZ2puJv98T09R5FNF5bn9\nkqi6usi8Tovl+bk4JqOCpLPMWT2Mvv+VTJ2YYeD5BGPfmyf9m5Rvslg5ZZBakWJsaMy3w1/WvYyZ\nF4srqQqCeh6EQoVSeK9F/5PPfRZ2VLlcnlF3TBGUog4O4HIjysAOO+AfrPCaVEAnHZQNjDpO59Gh\nYEXRe8L5+Lluf5n9OswgklfDi56lNZe9DLPXO/tBv81sD+EVQXYBe4BMhsxnLw3+vfcXPNN+mbnb\nt+eVmg6dLR6QNZx552qGf3emWJ5/6yN939N5MmR+f1XR2gzejjgIuVYCf6/UyhSTx8pkaJchO7of\nvHEw9L06JRu4VWcscc8sbjvKFXULtNWH0EmpY8EhqkE275SbN1DKR3C8O18JQEEWc8hBwqIS8MT/\nv7jEaYfg36YiVJ3s282bYfv2wHtufT/+duvnn8/Z7rdvh4svzgv7DIyY6unLmXcSiVzWcGGUkk8+\nxGyv0w7kRdz4rc0wOzfL6F0FzuoKiCLCZurYFJkDmfAKReH548+3fbx/Pf0jccEUQUSUi8GvNgSz\nXNROuUifaj538bt4ymp4y0Cs/oSzZUtCLCoBL5KbIUSWhzDvJpfNzATec76L/H/STMYJFfUuNDMz\n42RDb926GPY59mAfScnvyZM9ScYuvCmXTJUN+/QJNw3Kh/BrD4xompupumDdwG/8RxR9y/qQkA6o\nVctWMXz3cPgPFZh5cabtOsVCfEOR2yzPoTMUQSZD5p2rc2Vvy1T0rIZyMfihwiTdKBlZwH/RGB/K\nRfpUE55Z+F0Kq0XOnORs2ZIQld4vCkrdc3F0PTjohJX6ZT3PzTlLVrqj9PR9TzP+/lvyq6OevJX0\nBaPOPfySyTzhppWEQJaMaAqoUbThqtJTqLF7F3xLct90/k15voAgslE7hR1eGNqtUywkUHG3UZ5D\n+yuCTIbMDR9h+HdnitYPiFIZlBuZezvsoP/L1DHHNr7wGec1bMZyqSzn7Of2vVD8ub0noKdghU6/\nGYifGSaPMgPOoN+mFsopuKm5mfJVTQvCOtMb0xy66hALOxY4tGaM9J/tLn2PmdwIvpIQyLGhseBn\nJSDU9OGVc4H+gWUv+wwInk8s+hxSK/xLhSckkVN6F4zzzIsVLNxTQDt1ioV0Qp5D+yuC0VFG/+tc\n2UVZaiVMDH62w574VmOL1qUPwNOfcz7XK98td8Ktd5bPGwhl2vHJAVj2cu7z/X6bZS+Xfl+57zR+\nt7vkpg+hZiGl4vXDJoG5I3jf9RYCnL/pjWnG/63P/3evoohb1imfNyD4XwuLnx2kpHa/f7ej9K46\nRHpjumzHVsrE1E6dYiGdkOfQ/lFDXV10fVorXj8AETjzTHjkkdBO00ooFWoaN0qFqC6yQN6I1Rs1\nVIpS0UZhqDj+P0tPj1PVNKhMQldXuL+7N5qpEipcx6BUxJBvqG1BiGuYqBffsFGXVy9/NYf/9LB/\nfkaIiKdWp92jhlDV2G9nnXWWVk0qpamrUK4t3lI3pHLXTUyoplKqIs7rxER+O6gmEt5CFO299fYu\n7k9sRJN/7v8bcq1zbmJjifv09NRPzr4+ndg5oqkbUirXiqZuSOnEuX1l37P49y3x3IT6/FSq9H1K\nEfTM+bB+e4+yo+C334GuvzL/b6WgmkyW/35BIu2fyP8t9xffJ8w1LUEFv3+rAuzTEH1sxZ1yM7aa\nFMHEhE6c1VPUkSWv663+AR4ZySmFRMI59hLUaXR1Oa8i9esYCzevnMuX+19TKE+2IxkaWmyb2Iim\nPtGtsgPt+4SzyQ40dVUJJZDtcL3/cCedFE7uML9RUCc8MeF8h1o6R797FG41dLgVMzGh6690Ov/s\ntn57T07WNu/QIieKZ6QFMEXgZWJCJ87t09RVbuf1V33NHcUU/uOG7dRFnM51yRL/TtNPKRV+rt/D\nPzISviPxzpC8ymZoKPw9vIq03Mi93O8R9jeu5h+88B6V/E5R0iGdVkMJ+r+rZYYXQ0wRGP7EbfRY\n6h9yYqLY7NHsf9pys8F6UO9OK27PRJ2Z2D+RGxQWzmhLDS5aEFMERmtQbrQ7MeE/M2jGiHhkxL9D\nrrcyCDKTRdFpddhsY2L/hCbHksE+rg6dEbR/+KgRb8qtfJVOw9NPw8REbatjRUHQ0phllsysmaDl\nMIPaKyFo5bWAxLZWJ3DBoiEqX3qzjWj/8FHDiIpSxQrr+X+0enV+iYwsfX2OkqyFoDDZasNiY07X\ndV2+mdaisHDGROMHF3XGis4ZRtQErcQWdoW2ankmIOM3qL0SghLYqkhsawUCs4RXppydStaxbiNM\nERhGWIYDCrIFtUdFPTvrbEE9L21sIgnMEl6yJXiFuQ7AFIFhhGXnThgZyc0AEgnneOfO+n5uPTvr\ncj6aNiOwFMhn93SUr6SQmnwEIvIh4FrgTOBsVd3nOXcN8FFgHvjvqvo9t/084CYgAXxFVcsWFDAf\ngdHx+C2u06addVNoU19JWB9Bd7kLyvBz4APAlws+fD1wCbABeDXwfRH5bff0F4F3A9PAT0TkLlUN\nXofPMAyn07eOv34EDYhbIJgmCmpSBKr6CIAUR1NcBNyhqi8DvxKRg8DZ7rmDqvpL9313uNeaIjAM\nw2gS9fIRrAMe9xxPu21B7YZhGEaTKDsjEJHvA//F59Soqt4ZvUiLnzsMDAMMtGkom2EYRhwoqwhU\ndXO5a3w4DJzqOe532yjRXvi548A4OM7iKmQwDMMwQlAv09BdwCUiskRETgPOAH4M/AQ4Q0ROE5Fe\nHIfyXXWSwTAMIxzmLK4eEXk/8L+BNcB3ReRBVX2Pqj4kIt/AcQKfAD6mqvPuez4OfA8nfPQWVX2o\npm9gGIYRBR3S6fthtYYMwzDaFKs1ZBiGYYTCFIFhGEaHY4rAMAyjwzFFYBiG0eG0hLNYRJ4DHm22\nHBWyGqhx1ZCGYvLWn1aTudXkhdaTud7yplR1TbmLai061ygeDeP5jhMisq+VZDZ560+rydxq8kLr\nyRwXec00ZBiG0eGYIjAMw+hwWkURjDdbgCpoNZlN3vrTajK3mrzQejLHQt6WcBYbhmEY9aNVZgSG\nYRhGnYidIhCRz4nIL0Rkv4h8W0RWes5dIyIHReRREXmPp/08t+2giFzdYHk/JCIPiciCiGwqOBc7\nef2ImzwAInKLiDwlIj/3tK0SkXtF5DH39WS3XUTk8678+0XkzU2Q91QRuU9EHnafh+0tIPNSEfmx\niPzMlfk6t/00Ebnfle3rbqVg3GrCX3fb7xeRwUbL7MqREJGfish34i6viBwSkQMi8qCI7HPb4vdM\nqGqsNuAPgG53/7PAZ9399cDPgCXAacB/4FQwTbj7rwF63WvWN1DeM4HXAj8ENnnaYymvj/yxkscj\n1zuANwM/97T9LXC1u3+159nYAvwzIMDbgPubIO9a4M3u/iuA/+c+A3GWWYDl7n4PcL8ryzeAS9z2\nLwEj7v424Evu/iXA15v0bPwJ8A/Ad9zj2MoLHAJWF7TF7plo+B+xwh/x/UDG3b8GuMZz7nvAOe72\nPU973nUNlLVQEcRaXs/nx0qeAtkGCxTBo8Bad38tTn4JwJeBD/td10TZ7wTe3SoyA0ng34G34iQ4\nZQdji89H9hl297vd66TBcvYDe4F3Ad9xO804y+unCGL3TMTONFTAFTgaElpvHeRWkTdu8pTiFFV9\nwt3/NXCKux+r7+CaIN6EM8KOtcyumeVB4CngXpzZ4VFVPeEj16LM7vljQF9jJeZG4JPAgnvcR7zl\nVeBfROQBcZbfhRg+E03JLJYQ6yCLyCjOojaZRsrmRxh5jcaiqioisQt5E5HlwD8CV6nqb0Rk8Vwc\nZVZnwag3ur64bwO/02SRAhGRPwSeUtUHROTcZssTkrer6mEReRVwr4j8wnsyLs9EUxSBllkHWUQu\nB/4QGFJ3jkQE6yBXSzl5A2iavBVSSs648aSIrFXVJ0RkLc4oFmLyHUSkB0cJZFT1W25zrGXOoqpH\nReQ+HNPKShHpdkfRXrmyMk+LSDewAphpoJi/B1woIluApcArgZtiLC+qeth9fUpEvg2cTQyfidiZ\nhkTkPJyp34WqOus51WrrILeKvHGTpxR3AVvd/a04dvhs+x+5URdvA455pt4NQZyh/83AI6r6d55T\ncZZ5jTsTQESW4fg0HgHuAz4YIHP2u3wQ+IFnoFZ3VPUaVe1X1UGc5/QHqpqOq7wicpKIvCK7jxMI\n83Pi+Ew00nES0rlyEMdO9qC7fclzbhTHhvkocL6nfQtOlMZ/4JhrGinv+3FseS8DT5LveI2dvAHf\nIVbyuDJ9DXgCmHN/34/i2Hf3Ao8B3wdWudcK8EVX/gN4nPYNlPftOPbg/Z5nd0vMZX498FNX5p8D\nn3bbX4MzaDkI/B9gidu+1D0+6J5/TROfj3PJRQ3FUl5Xrp+520PZ/604PhOWWWwYhtHhxM40ZBiG\nYTQWUwSGYRgdjikCwzCMDscUgWEYRodjisAwDKPDMUVgGIbR4ZgiMAzD6HBMERiGYXQ4/x+Pg9kt\npmkfOwAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7fcf9ebb2588>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "from matplotlib import pyplot as plt\n",
    "classes = set(y)\n",
    "colors = ['red', 'green']\n",
    "for cur_class, color in zip(classes, colors):\n",
    "    mask = (y == cur_class).values\n",
    "    plt.scatter(Xd[mask,0], Xd[mask,1], marker='o', color=color, label=int(cur_class))\n",
    "plt.legend()\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 创造自己的转换器"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.base import TransformerMixin\n",
    "from sklearn.utils import as_float_array\n",
    "\n",
    "class MeanDiscrete(TransformerMixin):\n",
    "    def fit(self, X, y=None):\n",
    "        X = as_float_array(X)\n",
    "        self.mean = np.mean(X, axis=0)\n",
    "        return self\n",
    "\n",
    "    def transform(self, X):\n",
    "        X = as_float_array(X)\n",
    "        assert X.shape[1] == self.mean.shape[0]\n",
    "        return X > self.mean"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "mean_discrete = MeanDiscrete()\n",
    "\n",
    "X_mean = mean_discrete.fit_transform(X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "#%%file ./data/adult_tests.py\n",
    "import numpy as np\n",
    "from numpy.testing import assert_array_equal\n",
    "\n",
    "def test_meandiscrete():\n",
    "    X_test = np.array([[ 0,  2],\n",
    "                        [ 3,  5],\n",
    "                        [ 6,  8],\n",
    "                        [ 9, 11],\n",
    "                        [12, 14],\n",
    "                        [15, 17],\n",
    "                        [18, 20],\n",
    "                        [21, 23],\n",
    "                        [24, 26],\n",
    "                        [27, 29]])\n",
    "    mean_discrete = MeanDiscrete()\n",
    "    mean_discrete.fit(X_test)\n",
    "    assert_array_equal(mean_discrete.mean, np.array([13.5, 15.5]))\n",
    "    X_transformed = mean_discrete.transform(X_test)\n",
    "    X_expected = np.array([[ 0,  0],\n",
    "                            [ 0, 0],\n",
    "                            [ 0, 0],\n",
    "                            [ 0, 0],\n",
    "                            [ 0, 0],\n",
    "                            [ 1, 1],\n",
    "                            [ 1, 1],\n",
    "                            [ 1, 1],\n",
    "                            [ 1, 1],\n",
    "                            [ 1, 1]])\n",
    "    assert_array_equal(X_transformed, X_expected)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "test_meandiscrete()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "\n",
    "pipeline = Pipeline([('mean_discrete', MeanDiscrete()),\n",
    "                     ('classifier', DecisionTreeClassifier(random_state=14))])\n",
    "scores_mean_discrete = cross_val_score(pipeline, X, y, scoring='accuracy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Mean Discrete performance: 0.917\n"
     ]
    }
   ],
   "source": [
    "print(\"Mean Discrete performance: {0:.3f}\".format(scores_mean_discrete.mean()))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  },
  "toc": {
   "colors": {
    "hover_highlight": "#DAA520",
    "navigate_num": "#000000",
    "navigate_text": "#333333",
    "running_highlight": "#FF0000",
    "selected_highlight": "#FFD700",
    "sidebar_border": "#EEEEEE",
    "wrapper_background": "#FFFFFF"
   },
   "moveMenuLeft": true,
   "nav_menu": {
    "height": "50px",
    "width": "252px"
   },
   "navigate_menu": true,
   "number_sections": true,
   "sideBar": true,
   "threshold": 4,
   "toc_cell": false,
   "toc_section_display": "block",
   "toc_window_display": false,
   "widenNotebook": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
